########################################################################################
#####                                                                             ######
#####   Input: Debates data;sentence-level topic scores; debate topic measures    ######
#####   Output: MP-by-debate data for analysis                                    ######          
#####                                                                             ######
########################################################################################

stdize <- function(x) (x - mean(x, na.rm = T))/sd(x, na.rm = T)

# Load libraries

library(quanteda) # v3.3.1
library(data.table) # v1.14.8
library(plyr) # v1.8.9

# Load data 

load("data/debates.Rdata") # Debate information
load("working/dictionaries_sentence_issues.Rdata") # Sentence-level issue dictionary scores 
load("working/validation/debate_title_scores.Rdata") # Debate title scores (issues)
load("working/dictionaries_sentence_liwc.Rdata") # Sentence-level women dictionary scores
load("working/validation/debate_title_scores_liwc.Rdata") # Debate title scores (women)

## Convert data to individual-by-debate 
individual_in_debate_scores <- dictionary_scores_issues[, list(glove_defence = weighted.mean(glove_defence/n_words, n_words), 
                                                 glove_economy = weighted.mean(glove_economy/n_words, n_words), 
                                                 glove_agriculture = weighted.mean(glove_agriculture/n_words, n_words), 
                                                 glove_health = weighted.mean(glove_health/n_words, n_words), 
                                                 glove_children = weighted.mean(glove_children/n_words, n_words), 
                                                 glove_education = weighted.mean(glove_education/n_words, n_words), 
                                                 glove_social = weighted.mean(glove_social/n_words, n_words),
                                                 glove_trade = weighted.mean(glove_trade/n_words, n_words),
                                                 glove_environment = weighted.mean(glove_environment/n_words, n_words), 
                                                 glove_crime = weighted.mean(glove_crime/n_words, n_words), 
                                                 glove_transport = weighted.mean(glove_transport/n_words, n_words)),
                                          by = list(person_id, section_id)]

individual_in_debate_liwc <- dictionary_scores_liwc[, list(dic_woman = sum(dic_woman)/sum(n_words)),
                                   by = list(person_id, section_id)]

## Move debate meta data to MP-by-debate
mp_by_debate <- debates[,list(name = unique(name),
                              parent = unique(parent),
                              party = unique(party_short),
                              gov_mp = unique(gov_mp),
                              is_gov_minister = unique(is_gov_minister),
                              is_opp_minister = unique(is_opp_minister),
                              attends_cabinet = unique(attends_cabinet),
                              attends_shadow_cabinet = unique(attends_shadow_cabinet),
                              has_degree = unique(has_degree), 
                              highest_education = unique(highest_education), 
                              occupation_class = unique(occupation_class), 
                              occupation = unique(occupation_new),
                              marginality = unique(margin),
                              years_in_parliament = unique(years_in_parliament),
                              age_years = unique(age_years),
                              is_committee_chair = unique(is_committee_chair),
                              is_speaker = unique(is_speaker),
                              gender = unique(gender),
                              cohort = unique(cohort),
                              question_time = unique(question_time),
                              prop_women = unique(prop_women),
                              female_leader_present = unique(female_leader_present),
                              pm_question_time = unique(pm_question_time),
                              year = unique(year),
                              session = unique(session),
                              yearmon = unique(yearmon),
                              hdate = unique(hdate), 
                              parliamentary_term = unique(parliamentary_term),
                              procedure = unique(procedure), 
                              legislation = unique(legislation),
                              backbench_opposition = unique(backbench_opposition_day),
                              joiners = unique(joiners), 
                              leavers = unique(leavers),
                              debate_type = unique(debate_type), 
                              petitions = unique(petitions),
                              defence_committee = unique(defence_comm), 
                              economy_committee = unique(economy_comm), 
                              agriculture_committee = unique(agriculure_comm), 
                              health_committee = unique(health_comm), 
                              family_committee = unique(children_comm), 
                              education_committee = unique(education_comm), 
                              welfare_committee = unique(social_welfare_comm), 
                              trade_committee = unique(trade_comm),
                              environment_committee = unique(environment_comm), 
                              energy_committee = unique(energy_comm), 
                              crime_committee = unique(crime_comm), 
                              transport_committee = unique(transport_comm),
                              international_affairs_committee = unique(international_affairs_comm),
                              civil_rights_committee = unique(civil_rights_comm), 
                              women_committee = unique(women_comm), 
                              defence_minister_gov = unique(defence_secretary_gov), 
                              defence_minister_opp = unique(defence_secretary_opp), 
                              economy_minister_gov = unique(economy_secretary_gov),
                              economy_minister_opp = unique(economy_secretary_opp), 
                              agriculture_minister_gov = unique(agriculure_secretary_gov),
                              agriculture_minister_opp = unique(agriculure_secretary_opp), 
                              health_minister_gov = unique(health_secretary_gov), 
                              health_minister_opp = unique(health_secretary_opp),
                              family_minister_gov = unique(children_secretary_gov),
                              family_minister_opp = unique(children_secretary_opp),
                              education_minister_gov = unique(education_secretary_gov), 
                              education_minister_opp = unique(education_secretary_opp),
                              welfare_minister_gov = unique(social_welfare_secretary_gov),
                              welfare_minister_opp = unique(social_welfare_secretary_opp), 
                              trade_minister_gov = unique(trade_secretary_gov), 
                              trade_minister_opp = unique(trade_secretary_opp),
                              environment_minister_gov = unique(environment_secretary_gov),
                              environment_minister_opp = unique(environment_secretary_opp),
                              energy_minister_gov = unique(energy_secretary_gov), 
                              energy_minister_opp = unique(energy_secretary_opp),
                              crime_minister_gov = unique(crime_secretary_gov),
                              crime_minister_opp = unique(crime_secretary_opp), 
                              transport_minister_gov = unique(transport_secretary_gov),
                              transport_minister_opp = unique(transport_secretary_opp),
                              international_affairs_minister_gov = unique(international_affairs_secretary_gov),
                              international_affairs_minister_opp = unique(international_affairs_secretary_opp),
                              civil_rights_minister_gov = unique(civil_rights_secretary_gov), 
                              civil_rights_minister_opp = unique(civil_rights_secretary_opp),
                              women_minister_gov = unique(women_secretary_gov),
                              women_minister_opp = unique(women_secretary_opp)),
                        by = list(person_id, section_id)]

speech_scores <- merge(individual_in_debate_scores, mp_by_debate, by = c("person_id", "section_id"))
speech_scores <- merge(speech_scores, individual_in_debate_liwc, by = c("person_id", "section_id"))

# Create dictionary titles (issues)

debate_title_scores$defence_debate <- ifelse(debate_title_scores$dic_defence > 0, TRUE, FALSE)
debate_title_scores$economy_debate <- ifelse(debate_title_scores$dic_economy > 0, TRUE, FALSE)
debate_title_scores$agriculture_debate <- ifelse(debate_title_scores$dic_agriculture >0, TRUE, FALSE)
debate_title_scores$health_debate <- ifelse(debate_title_scores$dic_health >0, TRUE, FALSE)
debate_title_scores$children_debate <- ifelse(debate_title_scores$dic_children >0, TRUE, FALSE)
debate_title_scores$education_debate <- ifelse(debate_title_scores$dic_education >0, TRUE, FALSE)
debate_title_scores$welfare_debate <- ifelse(debate_title_scores$dic_social >0, TRUE, FALSE)
debate_title_scores$trade_debate <- ifelse(debate_title_scores$dic_trade >0, TRUE, FALSE)
debate_title_scores$environment_debate <- ifelse(debate_title_scores$dic_environment >0, TRUE, FALSE)
debate_title_scores$crime_debate <- ifelse(debate_title_scores$dic_crime >0, TRUE, FALSE)
debate_title_scores$transport_debate <- ifelse(debate_title_scores$dic_transport >0, TRUE, FALSE)

debate_titles <- debate_title_scores[, list(defence_debate = unique(defence_debate), 
                                            economy_debate = unique(economy_debate), 
                                            agriculture_debate = unique(agriculture_debate), 
                                            health_debate = unique(health_debate), 
                                            children_debate = unique(children_debate), 
                                            education_debate = unique(education_debate), 
                                            welfare_debate = unique(welfare_debate), 
                                            trade_debate = unique(trade_debate), 
                                            environment_debate = unique(environment_debate), 
                                            crime_debate = unique(crime_debate), 
                                            transport_debate = unique(transport_debate)),
                                            by = section_id]

speech_scores <- merge(speech_scores, debate_titles, by = c("section_id"))

# Create dictionary titles (liwc)

debate_title_scores_liwc$women_debate <- ifelse(debate_title_scores_liwc$dic_woman > 0, TRUE, FALSE)

debate_titles_liwc <- debate_title_scores_liwc[, list(women_debate = unique(women_debate)), 
                                               by = section_id]

speech_scores <- merge(speech_scores, debate_titles_liwc, by = c("section_id"))
speech_scores_issues <- speech_scores

# Standardise variables
speech_scores$defence_std <- stdize(speech_scores$glove_defence)
speech_scores$economy_std <- stdize(speech_scores$glove_economy)
speech_scores$agriculture_std <- stdize(speech_scores$glove_agriculture)
speech_scores$health_std <- stdize(speech_scores$glove_health)
speech_scores$children_std <- stdize(speech_scores$glove_children)
speech_scores$education_std <- stdize(speech_scores$glove_education)
speech_scores$social_std <- stdize(speech_scores$glove_social)
speech_scores$trade_std <- stdize(speech_scores$glove_trade)
speech_scores$environment_std <- stdize(speech_scores$glove_environment)
speech_scores$crime_std <- stdize(speech_scores$glove_crime)
speech_scores$transport_std <- stdize(speech_scores$glove_transport)
speech_scores$women_dic_std <- stdize(speech_scores$dic_woman)

## Drop unnecessary variables 
speech_scores$glove_defence <- NULL
speech_scores$glove_economy <- NULL
speech_scores$glove_agriculture <- NULL
speech_scores$glove_health <- NULL
speech_scores$glove_children <- NULL
speech_scores$glove_education <- NULL
speech_scores$glove_social <- NULL
speech_scores$glove_trade <- NULL
speech_scores$glove_environment <- NULL
speech_scores$glove_crime <- NULL
speech_scores$glove_transport <- NULL
speech_scores$dic_woman <- NULL

# Save speech_scores
save(speech_scores, file = "working/speech_scores.Rdata")
